writeLines("td, th { padding : 3px } th { background-color:white; color:black; border:1px solid black; text-align:center } td {color:black; border:1px solid black; word-wrap:break-word; white-space:nowrap; overflow: hidden; text-overflow: ellipsis; max-width:300px; text-align:left}", con= "../css/style.css")# Plot Boxplots faceted by shared metabolites
################################################################################
p <- plot_df %>%
ggplot(aes(y = METABOLITE_NAME, x = VALUE, color = REPLICATE)) +
geom_boxplot(alpha = 0.8) +
labs(title=paste0(tissue,' ',study_institute_metab_family,": Original Metabolite Abundances"),
x = "Abundance", y = "")
plot(p)# Identify Zero/Missing Values in Metabolites
################################################################################
na_df <- plot_df %>%
ungroup() %>%
group_by(METABOLITE_NAME, REPLICATE) %>%
mutate(ZERO_LOG = ifelse(VALUE == 0, 1, 0)) %>%
mutate(ZERO_N = sum(ZERO_LOG)) %>%
mutate(ZERO_FREQ = round(ZERO_N/length(shared_sams), digits = 2)) %>%
mutate(NA_LOG = ifelse(is.na(VALUE), 1, 0)) %>%
mutate(NA_N = sum(NA_LOG)) %>%
mutate(NA_FREQ = round(NA_N/length(shared_sams), digits = 2)) %>%
select(REPLICATE, METABOLITE_NAME, ZERO_N, ZERO_FREQ, NA_N, NA_FREQ) %>%
unique() %>%
arrange(REPLICATE, desc(ZERO_FREQ), desc(NA_FREQ)) %>%
ungroup()
na_df %>%
knitr::kable(format = "html") %>%
scroll_box(width = "100%", height = "400px")| REPLICATE | METABOLITE_NAME | ZERO_N | ZERO_FREQ | NA_N | NA_FREQ |
|---|---|---|---|---|---|
| 512 | Cer(d18:0/14:0) | 0 | 0 | 0 | 0 |
| 512 | Cer(d18:0/16:0) | 0 | 0 | 0 | 0 |
| 512 | Cer(d18:1/18:0) | 0 | 0 | 0 | 0 |
| 512 | Cer(d18:1/20:0) | 0 | 0 | 0 | 0 |
| 512 | Cer(d18:1/22:0) | 0 | 0 | 0 | 0 |
| 512 | Cer(d18:1/24:0) | 0 | 0 | 0 | 0 |
| 512 | Cer(d18:1/24:1) | 0 | 0 | 0 | 0 |
| 512 | Sphinganine 1-phosphate | 0 | 0 | 0 | 0 |
| 512 | Sphinganine | 0 | 0 | 0 | 0 |
| 512 | Sphingosine | 0 | 0 | 0 | 0 |
| 513 | Cer(d18:0/14:0) | 0 | 0 | 0 | 0 |
| 513 | Cer(d18:0/16:0) | 0 | 0 | 0 | 0 |
| 513 | Cer(d18:1/18:0) | 0 | 0 | 0 | 0 |
| 513 | Cer(d18:1/20:0) | 0 | 0 | 0 | 0 |
| 513 | Cer(d18:1/22:0) | 0 | 0 | 0 | 0 |
| 513 | Cer(d18:1/24:0) | 0 | 0 | 0 | 0 |
| 513 | Cer(d18:1/24:1) | 0 | 0 | 0 | 0 |
| 513 | Sphinganine 1-phosphate | 0 | 0 | 0 | 0 |
| 513 | Sphinganine | 0 | 0 | 0 | 0 |
| 513 | Sphingosine | 0 | 0 | 0 | 0 |
# Remove Metabolites with high NAor Zero Frequencies
################################################################################
met_rm <- na_df %>%
filter((ZERO_FREQ >= 0.95) | (NA_FREQ >= 0.8)) %>%
select(METABOLITE_NAME) %>% unlist() %>% unique()
metabolites <- metabolites[metabolites %!in% met_rm]
plot_df <- plot_df %>%
filter(METABOLITE_NAME %in% metabolites)
plot_df$METABOLITE_NAME <- as.character(plot_df$METABOLITE_NAME)# Summary Statistics of Density plot distribution
################################################################################
# Iterate through the runs and collect vectors
df_all <- data.frame()
for(rep in c(reps)){
# Collect numeric vector
num_vec <- plot_df %>%
filter(REPLICATE == rep) %>%
select(VALUE) %>% unlist() %>% unname()
df <- NumericSummaryStats(num_vec)
row.names(df) <- rep
df_all <- rbind(df_all, df)
}
df_all %>%
knitr::kable(format = "html") %>%
scroll_box(width = "100%", height = "200px")| NA_COUNT | NA_FREQ | MEAN | MEDIAN | MAX | MIN | MID_RANGE | VARIANCE | STD_DEV | SE_MEAN | Q1 | Q3 | KURTOSIS | SKEW | BC_LAMBDA | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 513 | 0 | 0 | 15.94 | 4.07 | 150.30 | 0.03 | 150.27 | 875.74 | 29.59 | 1.06 | 0.71 | 14.18 | 5.99 | 2.59 | 0.1 |
| 512 | 0 | 0 | 15.38 | 4.03 | 164.72 | 0.03 | 164.69 | 869.17 | 29.48 | 1.06 | 0.64 | 13.80 | 7.68 | 2.82 | 0 |
# Plot the density plot for all the gene counts
################################################################################
plot_df %>%
ggplot(aes(x = VALUE, color = REPLICATE)) +
geom_density() +
xlim(min(df_all$MIN), mean(df_all$Q3)) +
labs(title=paste0(tissue,' ',study_institute_metab_family,": Original Metabolite Abundances"),
x = "Abundance",
y = "Density")################################################################################
####################### Metabolite by Metabolite Correlations ##################
################################################################################
# Subset Matrices
x <- plot_df %>%
unite(METABOLITE_NAME,REPLICATE,
col = METABOLITE_NAME_REPLICATE, remove = F) %>%
select(METABOLITE_NAME_REPLICATE, labelid, VALUE,REPLICATE) %>%
split(.$REPLICATE) %>%
map(select, -REPLICATE) %>%
map(pivot_wider, names_from = METABOLITE_NAME_REPLICATE, values_from = VALUE) %>%
map(arrange, labelid) %>%
map(tibble::column_to_rownames, var = "labelid") %>%
map(as.matrix)
# Subset matrices
if(all(row.names(x[[1]]) == row.names(x[[2]]))){
shared_met_mat <- do.call(cbind,x)
}
Corr <- 'Spearman'
# Create an NxN Matrix of correlation
if(Corr == 'Pearson'){
cor1 <- stats::cor(shared_met_mat, method = 'pearson') %>% round(digits = 3)
}else if(Corr == 'Spearman'){
cor1 <- stats::cor(shared_met_mat, method = 'spearman') %>% round(digits = 3)
}
# Reorder the correlation matrix
cormat <- cor1
# Melt the correlation matrix
melted_cormat <- melt(cormat, na.rm = TRUE)
################################################################################
####################### Only Reciprocal Pairs ##################################
################################################################################
# Remove rows that compare metabolites from the same dataset, them remove redundent measurements
melted_cormat2 <- melted_cormat
# Widen the datafrmae back to matrix
cormat2 <- melted_cormat2 %>%
pivot_wider(names_from = Var2, values_from = value) %>%
as.data.frame()
row.names(cormat2) <- cormat2$Var1
cormat2 <- cormat2 %>%
select(-Var1) %>%
as.matrix()
cormat2 <- cormat2[sort(rownames(cormat2)),sort(colnames(cormat2))]
cormat2 <- cormat2[grepl(reps[1], rownames(cormat2)),grepl(reps[2], colnames(cormat2))]
# Orient the colors and breaks
paletteLength <- 1000
myColor <- colorRampPalette(c("blue", "white", "red"))(paletteLength)
# length(breaks) == length(paletteLength) + 1
# use floor and ceiling to deal with even/odd length pallettelengths
myBreaks <- c(seq(-1, 0, length.out=ceiling(paletteLength/2) + 1),
seq(1/paletteLength, max(cor1, na.rm = T), length.out=floor(paletteLength/2)))
# Plot the heatmap
pheatmap(cormat2, color=myColor, breaks=myBreaks,
cluster_rows = F, cluster_cols = F,fontsize = 6)# Subset the data for sites to compare (scatterplots)
###################################################
plot_join_df <- countdata_df %>%
ungroup() %>%
unite(STUDY_INSTITUTE,METAB_FAMILY, col = "STUDY_INSTITUTE_METAB_FAMILY") %>%
filter(TISSUE == tissue) %>%
filter(NAMED == named) %>%
filter(STUDY_INSTITUTE_METAB_FAMILY == study_institute_metab_family) %>%
unnest(COUNT_DATA) %>%
filter(viallabel %in% as.character(unlist(sample_join$sample_id))) %>%
filter(METABOLITE_NAME %in% metabolites) %>%
unite(labelid, viallabel, col = "labelid_viallabel", remove = F) %>%
mutate(REPLICATE = case_when(grepl(paste0(reps[1],"$"),labelid_viallabel) ~ reps[1],
grepl(paste0(reps[2],"$"),labelid_viallabel) ~ reps[2],
)) %>%
select(REPLICATE,METABOLITE_NAME,labelid,VALUE) %>%
mutate(REPLICATE = as.character(REPLICATE)) %>%
split(.$REPLICATE) %>%
map(~rename(., !!sym(unique(.$REPLICATE)) := "VALUE")) %>%
map(~select(., -REPLICATE)) %>%
purrr::reduce(left_join, by = c("labelid","METABOLITE_NAME"))
# Plot scatter plots
################################################################################
# Plot scatter plots ordered by sample (include R2 values)
lm_df <- plot_join_df %>%
rename(name = METABOLITE_NAME) %>%
rename(x = reps[1]) %>%
rename(y = reps[2])
# Iterate through each of the shared metabolites to collect summary info about their correlations
r2_df <- data.frame()
for(metab in metabolites){
met_df <- lm_df %>%
filter(name == metab)
r2_df <- rbind(r2_df, lm_eqn(met_df))
}
# Display summaries
met_r2_df <- r2_df %>%
arrange(METABOLITE) %>%
select(METABOLITE,R2) %>%
arrange(desc(R2))
met_r2_df %>%
knitr::kable(format = "html") %>%
scroll_box(width = "100%", height = "200px")| METABOLITE | R2 |
|---|---|
| Cer(d18:0/16:0) | 0.167 |
| Sphinganine | 0.132 |
| Cer(d18:1/18:0) | 0.112 |
| Cer(d18:0/14:0) | 0.0971 |
| Cer(d18:1/24:0) | 0.0633 |
| Cer(d18:1/22:0) | 0.0464 |
| Sphingosine | 0.0284 |
| Cer(d18:1/20:0) | 0.00805 |
| Cer(d18:1/24:1) | 0.00425 |
| Sphinganine 1-phosphate | 0.0000808 |
# Collect the order of metabolites
scat_met_order <- met_r2_df %>% select(METABOLITE) %>% unlist() %>% as.character()
# Plot the scatter plots
for(metab in scat_met_order){
p <- plot_join_df %>%
filter(METABOLITE_NAME == metab) %>%
ggplot(aes(x = !!sym(reps[1]), y = !!sym(reps[2])), color = ) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", size = 1) +
geom_abline(linetype="dashed") +
facet_wrap(~ METABOLITE_NAME) +
expand_limits(x = 0, y = 0) +
#labs(title=paste0(tissue,' ',study_institute_metab_family,": Normalized Metabolite Abundances")) +
coord_fixed(ratio=1)
plot(p)
}################################################################################
####################### Sample by Sample Correlations ##########################
################################################################################
# Subset Data
################################################################################
# Original NxP Matrix
x <- plot_df %>%
unite(labelid,REPLICATE,
col = labelid_REPLICATE, remove = F) %>%
select(labelid_REPLICATE, METABOLITE_NAME, VALUE,REPLICATE) %>%
split(.$REPLICATE) %>%
map(select, -REPLICATE) %>%
map(pivot_wider, names_from = METABOLITE_NAME, values_from = VALUE) %>%
map(tibble::column_to_rownames, var = "labelid_REPLICATE") %>%
map(as.matrix)
# Subset matrices
shared_sam_mat <- do.call(rbind,x) %>%
t()
# Create an NxN Matrix of correlation
Corr <- 'Spearman'
if(Corr == 'Pearson'){
cor1 <- stats::cor(shared_sam_mat, method = 'pearson') %>% round(digits = 3)
}else if(Corr == 'Spearman'){
cor1 <- stats::cor(shared_sam_mat, method = 'spearman') %>% round(digits = 3)
}
# Melt the correlation matrix
melted_cormat <- melt(cor1, na.rm = TRUE)
################################################################################
####################### Unclustered ############################################
################################################################################
# Remove rows that compare metabolites from the same dataset, them remove redundent measurements
melted_cormat2 <- melted_cormat
# Widen the dataframwe back to matrix
cormat2 <- melted_cormat2 %>%
pivot_wider(names_from = Var2, values_from = value) %>%
as.data.frame()
row.names(cormat2) <- cormat2$Var1
cormat2 <- cormat2 %>%
select(-Var1) %>%
as.matrix()
cormat2 <- cormat2[sort(rownames(cormat2)),sort(colnames(cormat2))]
cormat2 <- cormat2[grepl(reps[1], rownames(cormat2)),grepl(reps[2], colnames(cormat2))]
# Orient the colors and breaks
paletteLength <- 1000
myColor <- colorRampPalette(c("blue", "white", "red"))(paletteLength)
# length(breaks) == length(paletteLength) + 1
# use floor and ceiling to deal with even/odd length pallettelengths
myBreaks <- c(seq(-1, 0, length.out=ceiling(paletteLength/2) + 1),
seq(1/paletteLength, max(cor1, na.rm = TRUE), length.out=floor(paletteLength/2)))
# Plot the heatmap
pheatmap(as.matrix(cormat2), color=myColor, breaks=myBreaks,
cluster_rows = F, cluster_cols = F,fontsize = 6)pca <- prcomp(na.omit(t(shared_sam_mat)), scale. = F)
percentVar <- pca$sdev^2/sum(pca$sdev^2)
PC <- pca$x %>% as.data.frame()
PC$labelid_rep <- as.character(row.names(pca$x))
PC <- PC %>%
tidyr::separate(col = labelid_rep, into = c('labelid', 'REPLICATE'),
sep = "_", remove = T)
# Join the phenotype data
PC <- left_join(PC, pheno_df, by = 'labelid')
p0 <- PC %>%
ggplot(aes(x = PC1, y = PC2, color=REPLICATE)) +
geom_point(size = 3) +
geom_line(aes(group = labelid),color="dark grey") +
ggtitle('Shared Samples and Shared Metabolites') +
xlab(paste0('PC1: ',round(percentVar[1]*100,2),'% variance')) +
ylab(paste0('PC2: ',round(percentVar[2]*100,2),'% variance')) +
theme(aspect.ratio=1)
p0p1 <- PC %>%
ggplot(aes(x = PC1, y = PC2, color=Key.anirandgroup, shape = REPLICATE)) +
geom_point(size = 3) +
geom_line(aes(group = labelid),color="dark grey") +
ggtitle('Shared Samples and Shared Metabolites') +
xlab(paste0('PC1: ',round(percentVar[1]*100,2),'% variance')) +
ylab(paste0('PC2: ',round(percentVar[2]*100,2),'% variance')) +
scale_color_manual(values=ec_colors) +
theme(aspect.ratio=1)
p1PC$Registration.sex <- as.character(PC$Registration.sex)
p2 <- PC %>%
ggplot(aes(x = PC1, y = PC2, color=Registration.sex, shape = REPLICATE)) +
geom_point(size = 3) +
geom_line(aes(group = labelid),color="dark grey") +
ggtitle('Shared Samples and Shared Metabolites') +
xlab(paste0('PC1: ',round(percentVar[1]*100,2),'% variance')) +
ylab(paste0('PC2: ',round(percentVar[2]*100,2),'% variance')) +
theme(aspect.ratio=1)
p2# Normalize
# Note in this step, we use labelid to account for mayo's duplicate samples
################################################################################
norm_df <- plot_df %>%
unite(labelid, viallabel, col = "labelid_viallabel", remove = F) %>%
split(.$REPLICATE) %>%
map(select, labelid_viallabel, METABOLITE_NAME, VALUE) %>%
map(pivot_wider, names_from = METABOLITE_NAME, values_from = VALUE) %>%
map(column_to_rownames, var = "labelid_viallabel") %>%
map(as.matrix) %>%
map(AutoScaleMatrix) %>%
map(as.data.frame) %>%
map(rownames_to_column, var = "labelid_viallabel") %>%
map(pivot_longer, names_to = 'METABOLITE_NAME', values_to = 'VALUE', cols = all_of(metabolites)) %>%
map(mutate, REPLICATE = case_when(grepl(paste0(reps[1],"$"),labelid_viallabel) ~ reps[1],
grepl(paste0(reps[2],"$"),labelid_viallabel) ~ reps[2],
grepl(paste0(reps[3],"$"),labelid_viallabel) ~ reps[3])) %>%
bind_rows()# Plot Boxplots faceted by shared metabolites
################################################################################
norm_df %>%
ggplot(aes(y = METABOLITE_NAME, x = VALUE, color = REPLICATE)) +
geom_boxplot(alpha = 0.8) +
labs(title=paste0(tissue,' ',study_institute_metab_family,": Normalized Metabolite Abundances"),
x = "Abundance", y = "") ################################################################################
####################### Metabolite by Metabolite Correlations ##################
################################################################################
# Subset Matrices
x <- plot_df %>%
unite(METABOLITE_NAME,REPLICATE,
col = METABOLITE_NAME_REPLICATE, remove = F) %>%
select(METABOLITE_NAME_REPLICATE, labelid, VALUE,REPLICATE) %>%
split(.$REPLICATE) %>%
map(select, -REPLICATE) %>%
map(pivot_wider, names_from = METABOLITE_NAME_REPLICATE, values_from = VALUE) %>%
map(arrange, labelid) %>%
map(tibble::column_to_rownames, var = "labelid") %>%
map(as.matrix) %>%
map(AutoScaleMatrix)
# Subset matrices
if(all(row.names(x[[1]]) == row.names(x[[2]]))){
shared_met_mat <- do.call(cbind,x)
}
Corr <- 'Spearman'
# Create an NxN Matrix of correlation
if(Corr == 'Pearson'){
cor1 <- stats::cor(shared_met_mat, method = 'pearson') %>% round(digits = 3)
}else if(Corr == 'Spearman'){
cor1 <- stats::cor(shared_met_mat, method = 'spearman') %>% round(digits = 3)
}
# Reorder the correlation matrix
cormat <- cor1
# Melt the correlation matrix
melted_cormat <- melt(cormat, na.rm = TRUE)
################################################################################
####################### Only Reciprocal Pairs ##################################
################################################################################
# Remove rows that compare metabolites from the same dataset, them remove redundent measurements
melted_cormat2 <- melted_cormat
# Widen the datafrmae back to matrix
cormat2 <- melted_cormat2 %>%
pivot_wider(names_from = Var2, values_from = value) %>%
as.data.frame()
row.names(cormat2) <- cormat2$Var1
cormat2 <- cormat2 %>%
select(-Var1) %>%
as.matrix()
cormat2 <- cormat2[sort(rownames(cormat2)),sort(colnames(cormat2))]
cormat2 <- cormat2[grepl(reps[1], rownames(cormat2)),grepl(reps[2], colnames(cormat2))]
# Orient the colors and breaks
paletteLength <- 1000
myColor <- colorRampPalette(c("blue", "white", "red"))(paletteLength)
# length(breaks) == length(paletteLength) + 1
# use floor and ceiling to deal with even/odd length pallettelengths
myBreaks <- c(seq(-1, 0, length.out=ceiling(paletteLength/2) + 1),
seq(1/paletteLength, max(cor1, na.rm = T), length.out=floor(paletteLength/2)))
# Plot the heatmap
pheatmap(cormat2, color=myColor, breaks=myBreaks,
cluster_rows = F, cluster_cols = F,fontsize = 6)# Normalize
# Subset the data for sites to compare (scatterplots)
###################################################
norm_join_df <- countdata_df %>%
ungroup() %>%
unite(STUDY_INSTITUTE,METAB_FAMILY, col = "STUDY_INSTITUTE_METAB_FAMILY") %>%
filter(TISSUE == tissue) %>%
filter(NAMED == named) %>%
filter(STUDY_INSTITUTE_METAB_FAMILY == study_institute_metab_family) %>%
unnest(COUNT_DATA) %>%
filter(viallabel %in% as.character(unlist(sample_join$sample_id))) %>%
unite(labelid, viallabel, col = "labelid_viallabel", remove = F) %>%
mutate(REPLICATE = case_when(grepl(paste0(reps[1],"$"),labelid_viallabel) ~ reps[1],
grepl(paste0(reps[2],"$"),labelid_viallabel) ~ reps[2],
grepl(paste0(reps[3],"$"),labelid_viallabel) ~ reps[3])) %>%
split(.$REPLICATE) %>%
map(select, labelid_viallabel, METABOLITE_NAME, VALUE) %>%
map(pivot_wider, names_from = METABOLITE_NAME, values_from = VALUE) %>%
map(column_to_rownames, var = "labelid_viallabel") %>%
map(as.matrix) %>%
map(AutoScaleMatrix) %>%
map(as.data.frame) %>%
map(rownames_to_column, var = "labelid_viallabel") %>%
map(pivot_longer, names_to = 'METABOLITE_NAME', values_to = 'VALUE', cols = all_of(metabolites)) %>%
map(mutate, REPLICATE = case_when(grepl(paste0(reps[1],"$"),labelid_viallabel) ~ reps[1],
grepl(paste0(reps[2],"$"),labelid_viallabel) ~ reps[2],
grepl(paste0(reps[3],"$"),labelid_viallabel) ~ reps[3])) %>%
map(~rename(., !!sym(unique(.$REPLICATE)) := "VALUE")) %>%
map(~select(., -REPLICATE)) %>%
map(mutate, labelid = gsub(pattern = "_..*", replacement = '', labelid_viallabel)) %>%
map(select, -labelid_viallabel) %>%
purrr::reduce(left_join, by = c("labelid","METABOLITE_NAME"))
# Plot scatter plots ordered by sample (include R2 values)
lm_df <- norm_join_df %>%
rename(name = METABOLITE_NAME) %>%
rename(x = reps[1]) %>%
rename(y = reps[2])
# Iterate through each of the shared metabolites to collect summary info about their correlations
r2_df <- data.frame()
for(metab in metabolites){
met_df <- lm_df %>%
filter(name == metab)
r2_df <- rbind(r2_df, lm_eqn(met_df))
}
# Display summaries
met_r2_df <- r2_df %>%
arrange(METABOLITE) %>%
select(METABOLITE,R2) %>%
arrange(desc(R2))
met_r2_df %>%
knitr::kable(format = "html") %>%
scroll_box(width = "100%", height = "200px")| METABOLITE | R2 |
|---|---|
| Cer(d18:0/16:0) | 0.167 |
| Sphinganine | 0.132 |
| Cer(d18:1/18:0) | 0.112 |
| Cer(d18:0/14:0) | 0.0971 |
| Cer(d18:1/24:0) | 0.0633 |
| Cer(d18:1/22:0) | 0.0464 |
| Sphingosine | 0.0284 |
| Cer(d18:1/20:0) | 0.00805 |
| Cer(d18:1/24:1) | 0.00425 |
| Sphinganine 1-phosphate | 0.0000808 |
# Collect the order of metabolites
scat_met_order <- met_r2_df %>% select(METABOLITE) %>% unlist() %>% as.character()
# Plot scatter plots (Normalized)
################################################################################
# Plot the scatter plots
for(metab in scat_met_order){
p <- norm_join_df %>%
filter(METABOLITE_NAME == metab) %>%
ggplot(aes(x = !!sym(reps[1]), y = !!sym(reps[2])), color = ) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", size = 1) +
geom_abline(linetype="dashed") +
facet_wrap(~ METABOLITE_NAME) +
expand_limits(x = 0, y = 0) +
#labs(title=paste0(tissue,' ',study_institute_metab_family,": Normalized Metabolite Abundances")) +
coord_fixed(ratio=1)
plot(p)
}# norm_join_df %>%
# mutate(METABOLITE_NAME = factor(METABOLITE_NAME, levels = scat_met_order)) %>%
# ggplot(aes(x = !!sym(reps[1]), y = !!sym(reps[2])), color = ) +
# geom_point(alpha = 0.5) +
# geom_smooth(method = "lm", size = 1) +
# geom_abline(linetype="dashed") +
# facet_wrap(~ METABOLITE_NAME) +
# expand_limits(x = 0, y = 0) +
# labs(title=paste0(tissue,' ',study_institute_metab_family,": Normalized Metabolite Abundances")) +
# coord_fixed(ratio=1)# Plot the density plot for all the gene counts
################################################################################
norm_df %>%
ggplot(aes(x = VALUE, color = REPLICATE)) +
geom_density() +
labs(title=paste0(tissue,' ',study_institute_metab_family,": Normalized Metabolite Abundances"),
x = "Abundance",
y = "Density")# Summary Statistics of Density plot distribution
################################################################################
# Iterate through the runs and collect vectors
df_all <- data.frame()
for(rep in reps){
# Collect numeric vector
num_vec <- norm_df %>%
filter(REPLICATE == rep) %>%
select(VALUE) %>% unlist() %>% unname()
df <- NumericSummaryStats(num_vec)
row.names(df) <- rep
df_all <- rbind(df_all, df)
}
df_all
#}################################################################################
####################### Sample by Sample Correlations ##########################
################################################################################
# Subset Data
################################################################################
# Original NxP Matrix
x <- plot_df %>%
unite(labelid,REPLICATE,
col = labelid_REPLICATE, remove = F) %>%
select(labelid_REPLICATE, METABOLITE_NAME, VALUE,REPLICATE) %>%
split(.$REPLICATE) %>%
map(select, -REPLICATE) %>%
map(pivot_wider, names_from = METABOLITE_NAME, values_from = VALUE) %>%
map(tibble::column_to_rownames, var = "labelid_REPLICATE") %>%
map(as.matrix) %>%
map(AutoScaleMatrix)
# Subset matrices
shared_sam_mat <- do.call(rbind,x) %>%
t()
# Create an NxN Matrix of correlation
Corr <- 'Spearman'
if(Corr == 'Pearson'){
cor1 <- stats::cor(shared_sam_mat, method = 'pearson') %>% round(digits = 3)
}else if(Corr == 'Spearman'){
cor1 <- stats::cor(shared_sam_mat, method = 'spearman') %>% round(digits = 3)
}
# Melt the correlation matrix
melted_cormat <- melt(cor1, na.rm = TRUE)
################################################################################
####################### Unclustered ############################################
################################################################################
# Remove rows that compare metabolites from the same dataset, them remove redundent measurements
melted_cormat2 <- melted_cormat
# Widen the dataframwe back to matrix
cormat2 <- melted_cormat2 %>%
pivot_wider(names_from = Var2, values_from = value) %>%
as.data.frame()
row.names(cormat2) <- cormat2$Var1
cormat2 <- cormat2 %>%
select(-Var1) %>%
as.matrix()
cormat2 <- cormat2[sort(rownames(cormat2)),sort(colnames(cormat2))]
cormat2 <- cormat2[grepl(reps[1], rownames(cormat2)),grepl(reps[2], colnames(cormat2))]
# Orient the colors and breaks
paletteLength <- 1000
myColor <- colorRampPalette(c("blue", "white", "red"))(paletteLength)
# length(breaks) == length(paletteLength) + 1
# use floor and ceiling to deal with even/odd length pallettelengths
myBreaks <- c(seq(-1, 0, length.out=ceiling(paletteLength/2) + 1),
seq(1/paletteLength, max(cor1, na.rm = TRUE), length.out=floor(paletteLength/2)))
# Plot the heatmap
pheatmap(as.matrix(cormat2), color=myColor, breaks=myBreaks,
cluster_rows = F, cluster_cols = F,fontsize = 6)# Plot a PCA with outliers labeled
# shared_sam_mat <- shared_sam_mat %>%
# t() %>% AutoScaleMatrix() %>%
# t()
pca <- prcomp(na.omit(t(shared_sam_mat)), scale. = F)
percentVar <- pca$sdev^2/sum(pca$sdev^2)
PC <- pca$x %>% as.data.frame()
PC$labelid_rep <- as.character(row.names(pca$x))
PC <- PC %>%
tidyr::separate(col = labelid_rep, into = c('labelid', 'REPLICATE'),
sep = "_", remove = T)
# Join the phenotype data
PC <- left_join(PC, pheno_df, by = 'labelid')
p0 <- PC %>%
ggplot(aes(x = PC1, y = PC2, color=REPLICATE)) +
geom_point(size = 3) +
geom_line(aes(group = labelid),color="dark grey") +
ggtitle('Shared Samples and Shared Metabolites') +
xlab(paste0('PC1: ',round(percentVar[1]*100,2),'% variance')) +
ylab(paste0('PC2: ',round(percentVar[2]*100,2),'% variance')) +
theme(aspect.ratio=1)
p0p1 <- PC %>%
ggplot(aes(x = PC1, y = PC2, color=Key.anirandgroup, shape = REPLICATE)) +
geom_point(size = 3) +
geom_line(aes(group = labelid),color="dark grey") +
ggtitle('Shared Samples and Shared Metabolites') +
xlab(paste0('PC1: ',round(percentVar[1]*100,2),'% variance')) +
ylab(paste0('PC2: ',round(percentVar[2]*100,2),'% variance')) +
scale_color_manual(values=ec_colors) +
theme(aspect.ratio=1)
p1PC$Registration.sex <- as.character(PC$Registration.sex)
p2 <- PC %>%
ggplot(aes(x = PC1, y = PC2, color=Registration.sex, shape = REPLICATE)) +
geom_point(size = 3) +
geom_line(aes(group = labelid),color="dark grey") +
ggtitle('Shared Samples and Shared Metabolites') +
xlab(paste0('PC1: ',round(percentVar[1]*100,2),'% variance')) +
ylab(paste0('PC2: ',round(percentVar[2]*100,2),'% variance')) +
theme(aspect.ratio=1)
p2